Aggregated and atomic
scores per method
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#>
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#>
#> last_plot
#> The following object is masked from 'package:stats':
#>
#> filter
#> The following object is masked from 'package:graphics':
#>
#> layout
# datasets = read_yaml("datasets.yml")
# print(score_file)
# datasets = read_yaml("datasets.yml")
# datasets = read_yaml(file_dataset)
list_wd = strsplit(getwd(),'/')[[1]]
# Snakemake script : the current working dir is hadaca3_framework
if(list_wd[length(list_wd)] == 'hadaca3_framework'){
score_files = list(list.files(path = "./output/scores/", full.names = TRUE))
# nextflow script :
}else{
score_files = list(list.files(pattern = 'score-li*' ))
}
results_li <- data.frame(
dataset = character(),
ref = character(),
preprocessing_mixRNA = character(),
feature_selection_mixRNA = character(),
preprocessing_RNA = character(),
feature_selection_RNA = character(),
preprocessing_scRNA = character(),
feature_selection_scRNA = character(),
deconvolution_rna = character(),
preprocessing_mixMET = character(),
feature_selection_mixMET = character(),
preprocessing_MET = character(),
feature_selection_MET = character(),
deconvolution_met = character(),
late_integration = character(),
aid = numeric(),
aid_norm = numeric(),
aitchison = numeric(),
aitchison_norm = numeric(),
jsd = numeric(),
jsd_norm = numeric(),
mae = numeric(),
mae_norm = numeric(),
pearson_col = numeric(),
pearson_col_norm = numeric(),
pearson_row = numeric(),
pearson_row_norm = numeric(),
pearson_tot = numeric(),
pearson_tot_norm = numeric(),
rmse = numeric(),
rmse_norm = numeric(),
score_aggreg = numeric(),
sdid = numeric(),
sdid_norm = numeric(),
spearman_col = numeric(),
spearman_col_norm = numeric(),
spearman_row = numeric(),
spearman_row_norm = numeric(),
spearman_tot = numeric(),
spearman_tot_norm = numeric()
)
i = 0
for (score_file in score_files[[1]]) {
# Extract the base name of the file
base_name <- basename(score_file)
# Extract components from the file name
components <- str_match(base_name,
# dt ref OMIC ppmR fsmR omic ppR fsR omic ppSR fsSR deR omic ppmM fsmM omic ppM fsM deM li
# "score-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
"score-li-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
# components <- str_match(base_name, "score-(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)")[2:8]
scores <- read_hdf5(score_file)
# Append the extracted information to the results data frame
results_li <- rbind(results_li,
cbind(
data.frame(
dataset = components[1],
ref = components[2],
preprocessing_mixRNA = components[3],
feature_selection_mixRNA = components[4],
preprocessing_RNA = components[5],
feature_selection_RNA = components[6],
preprocessing_scRNA = components[7],
feature_selection_scRNA = components[8],
deconvolution_rna = components[9],
preprocessing_mixMET = components[10],
feature_selection_mixMET = components[11],
preprocessing_MET = components[12],
feature_selection_MET = components[13],
deconvolution_met = components[14],
late_integration = components[15],
stringsAsFactors = FALSE
),
scores
))
rownames(results_li) = NULL
i = i +1
}
results_li %>%
# filter(dc==2) %>%
group_by(late_integration) %>%
summarise(GlobalScore = median(score_aggreg)) %>%
arrange(desc(GlobalScore))
#> # A tibble: 3 × 2
#> late_integration GlobalScore
#> <chr> <dbl>
#> 1 OnlyMet 0.663
#> 2 limeanRMSE 0.660
#> 3 OnlyRna 0.646
results_li_top5 = results_li %>%
group_by(preprocessing_mixRNA, feature_selection_mixRNA,
preprocessing_RNA, feature_selection_RNA,
preprocessing_scRNA, feature_selection_scRNA, deconvolution_rna,
preprocessing_mixMET,feature_selection_mixMET,
preprocessing_MET, feature_selection_MET, deconvolution_met,
late_integration, .groups = "keep") %>%
summarise(GlobalScore = median(score_aggreg)) %>%
arrange(desc(GlobalScore))
#> `summarise()` has grouped output by 'preprocessing_mixRNA',
#> 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA',
#> 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna',
#> 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET',
#> 'feature_selection_MET', 'deconvolution_met', 'late_integration'. You can
#> override using the `.groups` argument.
all_data_used = c('dataset', 'ref')
for(data_used in all_data_used){
results_li[[data_used]] = factor(results_li[[data_used]],
levels = unique(results_li[[data_used]])) # levels will be alphabeticaly ordered
}
all_functions_li = c('preprocessing_mixRNA', 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA', 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna', 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET', 'feature_selection_MET', 'deconvolution_met', 'late_integration' )
for( fun in all_functions_li){
results_li[[fun]] = factor(results_li[[fun]],
levels = unique(results_li[[fun]][order(results_li$score_aggreg[results_li$dataset=='invitro1'],decreasing = T)])) # sort based on the results_li on the in vitro dataset
}
index_aggreg <- which(names(results_li) == "score_aggreg")
datatable(
results_li[, c(1:length(all_functions_li)+2, index_aggreg)],
extensions = 'Buttons',
options = list(
pageLength = 10,
dom = 'Bfrtip', # This includes the Buttons extension in the layout
buttons = list(
list(
extend = 'colvis',
text = 'Show/Hide Columns',
columns = ':not(:first-child)' # This allows all columns except the first to be toggled
)
)
)
)
Visualisations of the
top 5 methods
test_dataset = 'invitro1'
name_part <- gsub("[0-9]+$", "", test_dataset)
number_part <- gsub("[^0-9]", "", test_dataset)
ground_truth_name_file = paste0("groundtruth",number_part,'_',name_part,"_pdac.h5")
ground_truth = read_hdf5(paste0(ground_truth_name_file))$groundtruth
print(ground_truth)
#> COMT02TFZP COMT05TFZP COMT13TFZP COMT17TFZP COMT28TFZP COMT09TFZP
#> endo 0.27930342 0.1346876324 0.15188568 0.15486036 0.13274656 0.1119107593
#> fibro 0.34660545 0.3824721705 0.42652665 0.44660194 0.42781906 0.5567950102
#> immune 0.08661531 0.1346876324 0.11086336 0.09888529 0.09067407 0.0894806285
#> classic 0.03208865 0.3471947859 0.28674866 0.28670742 0.02807638 0.0009595778
#> basal 0.25538717 0.0009577787 0.02397564 0.01294498 0.32068393 0.2408540242
#> COMT30TFZP COMT11TFZP COMT16TFZP COMT14TFZP COMT24TFZP COMT12TFZP
#> endo 0.22966060 0.09939095 0.1793829 0.12084882 0.1045731 0.1440144
#> fibro 0.48280241 0.43257242 0.4673523 0.49154778 0.5172227 0.5719772
#> immune 0.06290808 0.12538366 0.1225783 0.12288694 0.1080669 0.0379838
#> classic 0.00000000 0.00000000 0.2306864 0.18283179 0.2701372 0.1899790
#> basal 0.22462891 0.34265298 0.0000000 0.08188467 0.0000000 0.0560456
#> COMT18TFZP COMT01TFZP COMT23TFZP
#> endo 0.15858108 0.29755109 0.13487915
#> fibro 0.44455683 0.38391542 0.45655093
#> immune 0.12039880 0.06208194 0.04193842
#> classic 0.23227251 0.05011260 0.36663150
#> basal 0.04419078 0.20633894 0.00000000
prediction_file = sapply(1:nrow(results_li_top5), function(i){
paste0("pred-li-",paste(c("invitro1", "ref", results_li_top5[i,1:13]),collapse = "_") ,".h5")
})
pred = lapply(prediction_file, \(path){read_hdf5(path)$pred}) # mettre $pred derrière peut être ?
print(pred)
#> [[1]]
#> COMT02TFZP COMT05TFZP COMT13TFZP COMT17TFZP COMT28TFZP COMT09TFZP
#> endo 0.212188608 0.150847656 0.127830728 0.17012495 0.168477547 0.1663772111
#> fibro 0.719241791 0.474346124 0.580474900 0.45870095 0.723880347 0.8079226100
#> immune 0.006486137 0.007965471 0.008504063 0.01116058 0.012229579 0.0052679876
#> classic 0.006577870 0.051431954 0.037384023 0.05170145 0.009316453 0.0008474217
#> basal 0.055505595 0.315408795 0.245806286 0.30831207 0.086096073 0.0195847696
#> COMT30TFZP COMT11TFZP COMT16TFZP COMT14TFZP COMT24TFZP COMT12TFZP
#> endo 0.1948986745 0.1175873835 0.17861790 0.13702482 0.127773504 0.162219257
#> fibro 0.7824100972 0.8581618874 0.54249650 0.66881880 0.600778429 0.627234600
#> immune 0.0057202599 0.0054064206 0.01122914 0.01051585 0.007918624 0.006656194
#> classic 0.0008059356 0.0007668087 0.03752967 0.02574475 0.036322560 0.027877206
#> basal 0.0161650328 0.0180774998 0.23012679 0.15789579 0.227206883 0.176012743
#> COMT18TFZP COMT01TFZP COMT23TFZP
#> endo 0.20550658 0.252505856 0.12826958
#> fibro 0.44919772 0.658817451 0.55497968
#> immune 0.02497565 0.006192471 0.00689947
#> classic 0.04799767 0.010561337 0.04120383
#> basal 0.27232237 0.071922884 0.26864744
#>
#> [[2]]
#> COMT02TFZP COMT05TFZP COMT13TFZP COMT17TFZP COMT28TFZP COMT09TFZP
#> endo 0.212188608 0.150847656 0.127830728 0.17012495 0.168477547 0.1663772111
#> fibro 0.719241791 0.474346124 0.580474900 0.45870095 0.723880347 0.8079226100
#> immune 0.006486137 0.007965471 0.008504063 0.01116058 0.012229579 0.0052679876
#> classic 0.006577870 0.051431954 0.037384023 0.05170145 0.009316453 0.0008474217
#> basal 0.055505595 0.315408795 0.245806286 0.30831207 0.086096073 0.0195847696
#> COMT30TFZP COMT11TFZP COMT16TFZP COMT14TFZP COMT24TFZP COMT12TFZP
#> endo 0.1948986745 0.1175873835 0.17861790 0.13702482 0.127773504 0.162219257
#> fibro 0.7824100972 0.8581618874 0.54249650 0.66881880 0.600778429 0.627234600
#> immune 0.0057202599 0.0054064206 0.01122914 0.01051585 0.007918624 0.006656194
#> classic 0.0008059356 0.0007668087 0.03752967 0.02574475 0.036322560 0.027877206
#> basal 0.0161650328 0.0180774998 0.23012679 0.15789579 0.227206883 0.176012743
#> COMT18TFZP COMT01TFZP COMT23TFZP
#> endo 0.20550658 0.252505856 0.12826958
#> fibro 0.44919772 0.658817451 0.55497968
#> immune 0.02497565 0.006192471 0.00689947
#> classic 0.04799767 0.010561337 0.04120383
#> basal 0.27232237 0.071922884 0.26864744
#>
#> [[3]]
#> COMT02TFZP COMT05TFZP COMT13TFZP COMT17TFZP COMT28TFZP COMT09TFZP
#> endo 0.212188608 0.150847656 0.127830728 0.17012495 0.168477547 0.1663772111
#> fibro 0.719241791 0.474346124 0.580474900 0.45870095 0.723880347 0.8079226100
#> immune 0.006486137 0.007965471 0.008504063 0.01116058 0.012229579 0.0052679876
#> classic 0.006577870 0.051431954 0.037384023 0.05170145 0.009316453 0.0008474217
#> basal 0.055505595 0.315408795 0.245806286 0.30831207 0.086096073 0.0195847696
#> COMT30TFZP COMT11TFZP COMT16TFZP COMT14TFZP COMT24TFZP COMT12TFZP
#> endo 0.1948986745 0.1175873835 0.17861790 0.13702482 0.127773504 0.162219257
#> fibro 0.7824100972 0.8581618874 0.54249650 0.66881880 0.600778429 0.627234600
#> immune 0.0057202599 0.0054064206 0.01122914 0.01051585 0.007918624 0.006656194
#> classic 0.0008059356 0.0007668087 0.03752967 0.02574475 0.036322560 0.027877206
#> basal 0.0161650328 0.0180774998 0.23012679 0.15789579 0.227206883 0.176012743
#> COMT18TFZP COMT01TFZP COMT23TFZP
#> endo 0.20550658 0.252505856 0.12826958
#> fibro 0.44919772 0.658817451 0.55497968
#> immune 0.02497565 0.006192471 0.00689947
#> classic 0.04799767 0.010561337 0.04120383
#> basal 0.27232237 0.071922884 0.26864744
#>
#> [[4]]
#> COMT02TFZP COMT05TFZP COMT13TFZP COMT17TFZP COMT28TFZP COMT09TFZP
#> endo 0.212188608 0.150847656 0.127830728 0.17012495 0.168477547 0.1663772111
#> fibro 0.719241791 0.474346124 0.580474900 0.45870095 0.723880347 0.8079226100
#> immune 0.006486137 0.007965471 0.008504063 0.01116058 0.012229579 0.0052679876
#> classic 0.006577870 0.051431954 0.037384023 0.05170145 0.009316453 0.0008474217
#> basal 0.055505595 0.315408795 0.245806286 0.30831207 0.086096073 0.0195847696
#> COMT30TFZP COMT11TFZP COMT16TFZP COMT14TFZP COMT24TFZP COMT12TFZP
#> endo 0.1948986745 0.1175873835 0.17861790 0.13702482 0.127773504 0.162219257
#> fibro 0.7824100972 0.8581618874 0.54249650 0.66881880 0.600778429 0.627234600
#> immune 0.0057202599 0.0054064206 0.01122914 0.01051585 0.007918624 0.006656194
#> classic 0.0008059356 0.0007668087 0.03752967 0.02574475 0.036322560 0.027877206
#> basal 0.0161650328 0.0180774998 0.23012679 0.15789579 0.227206883 0.176012743
#> COMT18TFZP COMT01TFZP COMT23TFZP
#> endo 0.20550658 0.252505856 0.12826958
#> fibro 0.44919772 0.658817451 0.55497968
#> immune 0.02497565 0.006192471 0.00689947
#> classic 0.04799767 0.010561337 0.04120383
#> basal 0.27232237 0.071922884 0.26864744
#>
#> [[5]]
#> COMT02TFZP COMT05TFZP COMT13TFZP COMT17TFZP COMT28TFZP COMT09TFZP
#> endo 0.22169845 0.14793173 0.13284560 0.16195740 0.17076846 0.174322828
#> fibro 0.68567831 0.41853911 0.51528989 0.41147527 0.68671679 0.783467525
#> immune 0.01222161 0.01609737 0.01639415 0.01969765 0.02145217 0.010342698
#> classic 0.01371836 0.07703053 0.06045139 0.07693695 0.01902355 0.003366243
#> basal 0.06668327 0.34040127 0.27501897 0.32993272 0.10203904 0.028500706
#> COMT30TFZP COMT11TFZP COMT16TFZP COMT14TFZP COMT24TFZP COMT12TFZP
#> endo 0.207479997 0.123191172 0.17728547 0.13924459 0.13218774 0.16566077
#> fibro 0.753543597 0.835627697 0.49097641 0.61506088 0.53972682 0.57543942
#> immune 0.011279409 0.010764967 0.01999623 0.01871931 0.01572625 0.01363989
#> classic 0.003062516 0.003543112 0.06050784 0.04269593 0.05895694 0.04742543
#> basal 0.024634480 0.026873051 0.25123405 0.18427929 0.25340226 0.19783449
#> COMT18TFZP COMT01TFZP COMT23TFZP
#> endo 0.19802445 0.24664876 0.13208414
#> fibro 0.39859610 0.63426640 0.49002753
#> immune 0.03554026 0.01338949 0.01421802
#> classic 0.07145061 0.02030218 0.06574376
#> basal 0.29638858 0.08539318 0.29792654